# This script loads the data for the main study

# load respondent-level data
main_resp <- read_rds("Data/main_resp_data.rds")

# load data on story evaluations
main_stories_data <- read_rds("Data/main_stories_data.rds")

# load metadata on stories (political direction, etc.)
main_stories_info <- read_csv("Data/main_stories_info.csv") |>
  mutate(story_current = case_when(story_code < 5000 ~ "Pre-selected",
                                   story_code > 5000 ~ "Current"))
main_stories_data <- left_join(main_stories_data, main_stories_info |>
                                 select(-story_text_eng, -fake))

# add day dummies (days already coded in respondent ids)
main_resp <- main_resp |>
  mutate(survey_day = str_sub(session_id, 1, 10))

# add individual-level variables (using state-run/independent sources, etc.)
main_resp <- main_resp |>
  # whether respondent uses any state-controlled media
  mutate(source_state_controlled_dummy = case_when(
    # set to NA if no responses to this question
    # (there was an option "none of the above" in the question)
    no_sources_chosen == 1 ~ NA_real_,
    no_sources_chosen == 0 & 
      (source_Russia_1_24 == 1 | source_Vesti == 1 |
         source_TV1 == 1 | source_Lenta == 1 |
         source_Gazeta == 1 | source_TASS == 1 | source_Zvezda == 1 |
         source_NTV == 1 | source_RIA == 1 | source_Izvestia == 1 |
         source_RenTV == 1 | source_RT == 1 | source_Vzglyad == 1 |
         source_RG == 1 | source_Sputnik == 1 | 
         source_KP == 1 | source_MK == 1) ~ 1,
    no_sources_chosen == 0 & 
      source_Russia_1_24 == 0 & source_Vesti == 0 &
      source_TV1 == 0 & source_Lenta == 0 &
      source_Gazeta == 0 & source_TASS == 0 & source_Zvezda == 0 &
      source_NTV == 0 & source_RIA == 0 & source_Izvestia == 0 &
      source_RenTV == 0 & source_RT == 0 & source_Vzglyad == 0 &
      source_RG == 0 & source_Sputnik == 0 & 
      source_KP == 0 & source_MK == 0 ~ 0),
    # whether respondent uses any state-run (state-owned) media
    source_state_run_dummy = case_when(
      no_sources_chosen == 1 ~ NA_real_,
      no_sources_chosen == 0 & 
        (source_Russia_1_24 == 1 | source_Vesti == 1 | 
           source_TV1 == 1 | source_RIA == 1 | 
           source_RT == 1 | source_RG == 1 | source_Sputnik == 1 |
           source_TASS == 1 | source_Zvezda == 1) ~ 1,
      no_sources_chosen == 0 & 
        source_Russia_1_24 == 0 & source_Vesti == 0 & 
        source_TV1 == 0 & source_RIA == 0 & 
        source_RT == 0 & source_Sputnik == 0 & source_TASS == 0 & 
        source_Zvezda == 0 ~ 0),
    # whether respondent uses any independent media
    source_independent_dummy = case_when(
      no_sources_chosen == 1 ~ NA_real_,
      no_sources_chosen == 0 & 
        (source_Rain == 1 | source_BBC == 1 | source_Open_Media == 1 |
           source_Echo == 1 | source_Meduza == 1 | source_Novaya_Gazeta == 1 |
           source_Vedomosti == 1 | source_Rosbalt == 1 | source_Euronews == 1 |
           source_Mediazona == 1 | source_foreign == 1) ~ 1,
      no_sources_chosen == 0 & 
        source_Rain == 0 & source_BBC == 0 & source_Open_Media == 0 &
        source_Echo == 0 & source_Meduza == 0 & source_Novaya_Gazeta == 0 &
        source_Vedomosti == 0 & source_Rosbalt == 0 & source_Euronews == 0 &
        source_Mediazona == 0 & source_foreign == 0 ~ 0),
    source_state_tv = case_when(
      source_Russia_1_24 == 1 | source_TV1 == 1 | 
        source_Vesti == 1 | source_Zvezda == 1 ~ 1,
      source_Russia_1_24 == 0 & source_TV1 == 0 & 
        source_Vesti == 0 & source_Zvezda == 0 ~ 0
    ),
    education_dummy = case_when(
      education == 1 ~ "Higher ed.",
      education == 0 ~ "Less than higher"
    ),
    pres_approval_dummy = case_when(
      pres_approval %in% 1:2 ~ 0,
      pres_approval %in% 3:4 ~ 1
    ),
    pres_approval_cat = case_when(
      pres_approval == 1 ~ "Certainly disapprove",
      pres_approval == 2 ~ "Somewhat disapprove",
      pres_approval == 3 ~ "Somewhat approve",
      pres_approval == 4 ~ "Certainly approve"
    ),
    pres_approval_cat = factor(pres_approval_cat, 
                               levels = c("Certainly disapprove", "Somewhat disapprove",
                                          "Somewhat approve", "Certainly approve")),
    # create a dummy for whether one is proud of the annexation of Crimea
    pride_history_crimea = case_when(
      pride_history == "nothing" ~ 0,
      # set to NA if "none of these" is chosen as an option along with other choices
      # (these are most likely random clicks)
      pride_history != "nothing" & str_detect(pride_history, "nothing") ~ NA_real_,
      !str_detect(pride_history, "nothing") &
        !str_detect(pride_history, "crimea") ~ 0,
      str_detect(pride_history, "crimea") ~ 1
    ),
    pride_history_crimea_cat = case_when(
      pride_history_crimea == 1 ~ "Proud of Crimea annexation",
      pride_history_crimea == 0 ~ "Not proud of Crimea annexation"
    ),
    pride_history_crimea_cat = factor(
      pride_history_crimea_cat,
      levels = c("Not proud of Crimea annexation",
                 "Proud of Crimea annexation")),
    # variable reflecting beliefs about Ukraine and EU sanctions
    # based on two stories in the survey
    EU_Ukr_beliefs = case_when(
      EU_Ukr_stories_total == 2 ~ "Pro-regime",
      EU_Ukr_stories_total == 1 ~ "In-between",
      EU_Ukr_stories_total == 0 ~ "Critical"
    ),
    EU_Ukr_beliefs = factor(
      EU_Ukr_beliefs))

# add a dummy for those who took quiz 2
resp_q2 <- main_stories_data |>
  select(session_id, set) |>
  distinct() |>
  group_by(session_id) |>
  mutate(set_max = max(set),
         took_quiz_2 = if_else(set_max == 2, 1, 0)) |>
  ungroup() |>
  distinct(session_id, took_quiz_2)

main_resp <- main_resp |> left_join(resp_q2)
rm(resp_q2)

# additional variables/cleanup at the story level
main_stories_data <- left_join(main_stories_data,
                               main_resp) |>
  # treatment variables, three alternative definitions of state media
  # state media definition 1: all state-controlled outlets
  mutate(state_controlled = case_when(
    story_source %in% c("tv1", "russia24", "rt", "ria",
                        "kp") ~ "State-controlled",
    story_source %in% c("rain", "echo", "meduza") ~ "Critical",
    story_source == "No source" ~ "No source",
    story_source == "rbc" ~ "RBC"
  ),
  state_controlled = factor(state_controlled,
                            levels = c("No source", 
                                       "Critical",
                                       "State-controlled",
                                       "RBC")),
  # state media definition 2: all state-controlled outlets + RBC
  state_controlled_RBC = case_when(
    story_source %in% c("tv1", "russia24", "rt", "ria",
                        "kp", "rbc") ~ "State-controlled",
    story_source %in% c("rain", "echo", "meduza") ~ "Critical",
    story_source == "No source" ~ "No source"
  ),
  state_controlled_RBC = factor(state_controlled_RBC,
                                levels = c("No source", 
                                           "Critical",
                                           "State-controlled")),
  # state media definition 3: only state-owned outlets
  state_run = case_when(
    story_source %in% c("tv1", "russia24", "rt", "ria") ~ "State-run",
    story_source %in% c("rain", "echo", "meduza") ~ "Critical",
    story_source %in% c("kp", "rbc") ~ "Other",
    story_source == "No source" ~ "No source"
  ),
  state_run = factor(state_run,
                     levels = c("No source", 
                                "Critical",
                                "State-run", 
                                "Other")),
  # clean treatment outlet names
  story_source = case_when(
    story_source == "tv1" ~ "Channel One",
    story_source == "russia24" ~ "Russia-24",
    story_source == "rt" ~ "RT",
    story_source == "ria" ~ "RIA",
    story_source == "kp" ~ "KP",
    story_source == "rain" ~ "Rain",
    story_source == "rbc" ~ "RBC",
    story_source == "echo" ~ "Echo of Moscow",
    story_source == "meduza" ~ "Meduza",
    story_source == "No source" ~ "No source"
  ),
  story_source = factor(story_source,
                        levels = c("No source",
                                   "Meduza",
                                   "Rain",
                                   "Echo of Moscow",
                                   "RBC",
                                   "Channel One",
                                   "Russia-24",
                                   "RT",
                                   "RIA",
                                   "KP")),
  story_label = as.factor(story_code))

# CLEANUP -----------
# remove straightliners (those who answered uniformly to all the questions)
main_stories <- main_stories_data |>
  filter(all_labeled_fake == 0,
         all_labeled_true == 0) |>
  select(-c(all_labeled_fake, all_labeled_true))

# remove those who have taken the quiz before
main_stories <- main_stories |>
  filter(taken == 0 | is.na(taken))

# remove responses that took less than 1 second
main_stories <- main_stories |>
  filter(story_time > 1)

# join days with few responses (for the use of story fixed effects)
main_stories <- main_stories |>
  mutate(survey_day = case_when(
    ymd(survey_day) >= ymd("2020-06-19") ~ "2020-06-19",
    survey_day == "2020-05-26" ~ "2020-06-19",
    survey_day == "2020-05-27" ~ "2020-06-19",
    survey_day == "2020-06-01" ~ "2020-06-19",
    survey_day == "2020-06-12" ~ "2020-06-19",
    TRUE ~ survey_day
  ))

